# Dataset found at https://www.kaggle.com/datasets/maartenvandevelde/marine-litter-watch-19502021
# Importing libraries
import pandas as pd
import numpy as np
from datetime import date
import plotly.express as px
import plotly.graph_objects as go
# Removes the 'SettingWithCopyWarning' that will show when running code later on in the project
pd.options.mode.chained_assignment = None
# Expand the width of the Jupyter notebook
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
# Importing datasets
marine_data = pd.read_csv(r"C:\Users\jksmi\Documents\Marine_Litter\MLW_Data.csv",encoding='iso-8859-1')
marine_meta = pd.read_csv(r"C:\Users\jksmi\Documents\Marine_Litter\MLW_Meta.csv")
# View marine_data dataframe
marine_data
| CommunityName | BeachName | BeachCountrycode | BeachRegionalSea | BeachLength_m | BeachLocation | BeachType | EventDate | EventType | NatRef | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20160310 | Cleanup | NaN | ... | 2.0 | 4.0 | NaN | 3.0 | NaN | NaN | 3.0 | NaN | 1.0 | NaN |
| 1 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20150902 | Cleanup | NaN | ... | 1.0 | NaN | 2.0 | NaN | NaN | NaN | 7.0 | 3.0 | NaN | NaN |
| 2 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20151203 | Cleanup | NaN | ... | NaN | NaN | 5.0 | NaN | NaN | NaN | 2.0 | NaN | 2.0 | 1.0 |
| 3 | gBqsPxAZ | krapetz | BG | Black Sea | 5850 | Rural | Sandy | 20151008 | Cleanup | NaN | ... | 5.0 | NaN | 2.0 | NaN | NaN | NaN | 5.0 | 26.0 | NaN | NaN |
| 4 | Legambiente Onlus | San Mauro Cilento (SA) | IT | Unknown | 100 | NaN | NaN | 20180428 | Monitoring | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | 2.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | Legambiente Onlus | Spiaggia di Marina di Montemarciano, Ancona (AN) | IT | Mediterranean Sea | 100 | NaN | NaN | 20160505 | Monitoring | NaN | ... | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
| 3409 | Surfrider Foundation Europe | La Lagune, 33260 La Teste de Buch, FRANCE | FR | North-east Atlantic Ocean | 548 | NaN | NaN | 20180602 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3410 | Surfers Against Sewage | Bournemouth beach | UK | North-east Atlantic Ocean | 2302 | Urban | Sandy | 20150328 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | NaN | NaN | NaN |
| 3411 | ocean-R-evolution.com | cleanuptravels.com | CH | NaN | 22 | Rural | Sandy | 20170315 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3412 | Marnoba | Las Salinas de Cabo de Gata, Almería | ES | Mediterranean Sea | 100 | Near river mouth | Sandy | 20130923 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3413 rows × 176 columns
# View marine_meta dataframe
marine_meta
| generalcode | category | generalname | |
|---|---|---|---|
| 0 | G1 | Plastic | 4/6-pack yokes, six-pack rings |
| 1 | G3 | Plastic | Shopping Bags incl. pieces |
| 2 | G4 | Plastic | Small plastic bags, e.g. freezer bags incl. pi... |
| 3 | G5 | Plastic | Plastic bags collective role; what remains fro... |
| 4 | G7 | Plastic | Drink bottles <=0.5l |
| ... | ... | ... | ... |
| 159 | G210 | Glass/ceramics | Other glass items |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... |
| 161 | G213 | Chemicals | Paraffin/Wax |
| 162 | G301 | Cloth/textile | Disposable mask |
| 163 | G302 | Cloth/textile | Disposable glove |
164 rows × 3 columns
# Checking for duplicates in the marine_meta dataset
marine_meta[marine_meta.duplicated()]
| generalcode | category | generalname |
|---|
# No duplicates found in the marine_meta dataset
# Find duplicates in marine data
marine_data[marine_data.duplicated()]
| CommunityName | BeachName | BeachCountrycode | BeachRegionalSea | BeachLength_m | BeachLocation | BeachType | EventDate | EventType | NatRef | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 390 | Swiss Litter Report | Murg_Waengi_StrickerR | CH | NaN | 13 | Urban | Pebbels | 20171019 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 439 | Surfrider Foundation Europe | Avenue Salvador Allende, 13500 Martigues, France | FR | Mediterranean Sea | 81 | NaN | NaN | 20150531 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 | NaN | NaN | NaN |
| 839 | Surfrider Foundation Europe | Viale Nettuno Lido di Metaponto, 75012 Lido di... | IT | Mediterranean Sea | 86 | NaN | NaN | 20150426 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 100.0 | NaN | NaN | NaN |
| 1400 | Surfrider Foundation Europe | 6178 Avenue de la Plage, 62231 Sangatte, France | FR | North-east Atlantic Ocean | 69 | NaN | NaN | 20160319 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 35.0 | NaN | NaN | NaN |
| 1998 | Surfrider Foundation Europe | via luigi sturzo, marina di Gioiosa Jonica RC,... | IT | Mediterranean Sea | 89 | NaN | NaN | 20150322 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 194.0 | NaN | NaN | NaN |
| 2203 | Surfrider Foundation Europe | 220-462 Route Departementale 940, 62200 Boulog... | FR | North-east Atlantic Ocean | 70 | NaN | NaN | 20150326 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 4.0 | NaN | NaN | NaN |
| 2298 | Surfrider Foundation Europe | AS-317 Muros de Nal?n, 33138 Muros de Nal?n, E... | ES | North-east Atlantic Ocean | 81 | NaN | NaN | 20150321 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2299 | Surfrider Foundation Europe | 15 Avenue de la Grande Plage, 40510 Seignosse,... | FR | North-east Atlantic Ocean | 80 | NaN | NaN | 20150308 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 20.0 | NaN | NaN | NaN |
| 2537 | Marnoba | Cala de Enmedio, Almería | ES | Mediterranean Sea | 100 | Rural | Rocky | 20160822 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2566 | Marnoba | El Saler, Valencia/València | ES | Mediterranean Sea | 100 | Near river mouth | Sandy | 20171114 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3007 | Surfrider Foundation Europe | Beg an Dorchen Plomeur, 29120 Plomeur, France | FR | North-east Atlantic Ocean | 74 | NaN | NaN | 20150405 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 150.0 | NaN | NaN | NaN |
| 3241 | Surfrider Foundation Europe | 32 Via Cartagine, 00058 Santa Marinella, Italie | IT | Mediterranean Sea | 83 | NaN | NaN | 20150322 | Cleanup | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 30.0 | NaN | NaN | NaN |
12 rows × 176 columns
# Checking the shape of the marine_data dataframe before removing duplicates
marine_data.shape
(3413, 176)
# Dropping duplicate values in dataset
marine_data = marine_data.drop_duplicates(keep=False)
# Checking the shape of the marine_data dataframe after duplicates removed
marine_data.shape
(3389, 176)
# Checking datatypes of the non-numerical columns in the marine_data dataframe
marine_data.loc[:,'CommunityName':'NatRef'].dtypes
CommunityName object BeachName object BeachCountrycode object BeachRegionalSea object BeachLength_m int64 BeachLocation object BeachType object EventDate int64 EventType object NatRef object dtype: object
# Changing EventDate to datetime datatype (EventDate is currently int64)
marine_data['EventDate'] = pd.to_datetime(marine_data['EventDate'],format="%Y%m%d")
# Finding the number of NaN values in each column of the marine_data dataframe
print(marine_data.isna().sum().to_string())
CommunityName 0 BeachName 0 BeachCountrycode 249 BeachRegionalSea 1111 BeachLength_m 0 BeachLocation 1729 BeachType 1707 EventDate 0 EventType 0 NatRef 3275 lon_x1 0 lat_y1 0 lon_x2 0 lat_y2 0 G1 3003 G3 964 G4 1550 G5 2986 G7 899 G8 1349 G9 2849 G10 1083 G11 2994 G12 2941 G13 2251 G14 3152 G15 3249 G16 3122 G17 3069 G18 3114 G19 3218 G21 695 G22 2212 G23 2016 G24 2104 G25 2081 G26 1713 G27 733 G28 2365 G29 3098 G30 802 G31 1714 G32 1894 G33 1435 G34 1604 G35 1163 G36 3118 G37 2460 G40 3195 G41 3192 G42 3234 G43 3137 G44 3138 G45 2889 G46 3176 G47 3091 G49 2221 G50 1146 G52 2391 G53 2707 G54 3067 G56 2608 G57 3114 G58 3209 G59 2015 G60 3128 G62 3091 G63 3222 G64 3294 G65 2512 G66 2079 G67 1970 G68 3144 G69 3216 G70 2024 G71 2045 G72 3132 G73 2416 G76 2838 G77 3293 G79 1045 G80 2191 G82 1131 G83 2428 G84 3209 G85 3121 G86 3116 G87 2293 G88 3113 G89 2254 G90 2512 G91 2469 G92 3178 G93 2426 G95 1446 G96 2184 G97 3202 G98 3191 G99 2460 G100 1971 G101 2597 G102 3122 G124 1561 G125 1949 G126 2483 G127 3316 G128 2589 G129 3301 G130 3166 G131 2540 G132 3362 G133 2575 G134 2195 G137 1996 G138 2504 G139 3237 G140 3312 G141 3294 G142 2320 G143 3322 G144 2304 G145 1494 G147 2574 G148 3009 G150 3095 G151 2445 G152 2119 G153 2433 G154 2534 G155 2590 G156 2130 G158 1976 G159 2321 G160 2549 G161 2963 G162 3261 G163 3250 G164 3232 G165 2473 G166 3215 G167 2604 G171 2121 G172 2369 G174 3052 G175 1238 G176 2350 G177 2000 G178 1665 G179 2636 G180 3278 G181 3256 G182 2644 G184 3295 G186 3249 G187 3326 G188 2699 G189 3333 G190 3291 G191 2445 G193 3307 G194 2647 G195 2710 G198 1742 G199 2465 G200 1453 G201 3178 G202 3226 G203 2585 G204 2145 G205 3336 G206 3265 G207 3270 G208 1329 G210 2406 G211 2439 G213 2326
# Finding the totals of each unique value in the BeachRegionalSea column
marine_data['BeachRegionalSea'].value_counts(dropna=False)
NaN 1111 Mediterranean Sea 994 North-east Atlantic Ocean 810 Black Sea 268 Unknown 110 Baltic Sea 96 Name: BeachRegionalSea, dtype: int64
# BeachCountrycode to BeachRegionalSea groupings and counts
print(marine_data[['BeachCountrycode','BeachRegionalSea']].value_counts(dropna=False,sort=False).to_string())
BeachCountrycode BeachRegionalSea
BA Mediterranean Sea 1
NaN 1
BE North-east Atlantic Ocean 4
BG Black Sea 74
CH Unknown 1
NaN 982
CV Unknown 4
CY Mediterranean Sea 8
DE Baltic Sea 5
NaN 84
DK Baltic Sea 69
North-east Atlantic Ocean 187
DZ Mediterranean Sea 2
EE Baltic Sea 2
EL Mediterranean Sea 1
ES Mediterranean Sea 192
North-east Atlantic Ocean 163
Unknown 5
FI Baltic Sea 1
FR Mediterranean Sea 63
North-east Atlantic Ocean 238
NaN 5
GE Black Sea 20
GR Mediterranean Sea 67
HR Mediterranean Sea 7
IE North-east Atlantic Ocean 42
IL Mediterranean Sea 3
IT Mediterranean Sea 494
Unknown 11
Int Mediterranean Sea 1
North-east Atlantic Ocean 2
NaN 20
LB Mediterranean Sea 1
MA Mediterranean Sea 1
NaN 2
ME Mediterranean Sea 1
MT Mediterranean Sea 2
NL North-east Atlantic Ocean 10
NO North-east Atlantic Ocean 2
PT North-east Atlantic Ocean 25
RO Black Sea 145
RU Baltic Sea 9
Black Sea 11
Unknown 1
SE Baltic Sea 2
North-east Atlantic Ocean 3
SI Mediterranean Sea 77
TN Mediterranean Sea 4
TR Black Sea 9
Mediterranean Sea 11
UA Black Sea 6
UK North-east Atlantic Ocean 55
ZA NaN 4
NaN Baltic Sea 8
Black Sea 3
Mediterranean Sea 58
North-east Atlantic Ocean 79
Unknown 88
NaN 13
# Handling NaN values in the BeachRegionalSea column
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'BA' to 'Mediterranean Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='BA','Mediterranean Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'CH' to 'Unknown'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='CH','Mediterranean Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'DE' to 'Baltic Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='DE','Baltic Sea',
inplace=True)
# Fill NaN values in BeachRegionalSea where BeachCountrycode equals 'MA' to 'Mediterranean Sea'
marine_data['BeachRegionalSea'].mask(marine_data['BeachCountrycode']=='MA','Mediterranean Sea',
inplace=True)
# Fill remaining NaN values in BeachRegionalSea to 'Unknown'
marine_data['BeachRegionalSea'].replace(to_replace=np.NaN,value='Unknown', inplace=True)
marine_data['BeachRegionalSea'].value_counts(dropna=False)
Mediterranean Sea 1980 North-east Atlantic Ocean 810 Black Sea 268 Baltic Sea 180 Unknown 151 Name: BeachRegionalSea, dtype: int64
# BeachCountrycode to BeachRegionalSea groupings and counts (with BeachRegionalSea NaN values handled)
marine_data[['BeachCountrycode','BeachRegionalSea']].value_counts(dropna=False,sort=False)
BeachCountrycode BeachRegionalSea
BA Mediterranean Sea 2
BE North-east Atlantic Ocean 4
BG Black Sea 74
CH Mediterranean Sea 983
CV Unknown 4
CY Mediterranean Sea 8
DE Baltic Sea 89
DK Baltic Sea 69
North-east Atlantic Ocean 187
DZ Mediterranean Sea 2
EE Baltic Sea 2
EL Mediterranean Sea 1
ES Mediterranean Sea 192
North-east Atlantic Ocean 163
Unknown 5
FI Baltic Sea 1
FR Mediterranean Sea 63
North-east Atlantic Ocean 238
Unknown 5
GE Black Sea 20
GR Mediterranean Sea 67
HR Mediterranean Sea 7
IE North-east Atlantic Ocean 42
IL Mediterranean Sea 3
IT Mediterranean Sea 494
Unknown 11
Int Mediterranean Sea 1
North-east Atlantic Ocean 2
Unknown 20
LB Mediterranean Sea 1
MA Mediterranean Sea 3
ME Mediterranean Sea 1
MT Mediterranean Sea 2
NL North-east Atlantic Ocean 10
NO North-east Atlantic Ocean 2
PT North-east Atlantic Ocean 25
RO Black Sea 145
RU Baltic Sea 9
Black Sea 11
Unknown 1
SE Baltic Sea 2
North-east Atlantic Ocean 3
SI Mediterranean Sea 77
TN Mediterranean Sea 4
TR Black Sea 9
Mediterranean Sea 11
UA Black Sea 6
UK North-east Atlantic Ocean 55
ZA Unknown 4
NaN Baltic Sea 8
Black Sea 3
Mediterranean Sea 58
North-east Atlantic Ocean 79
Unknown 101
dtype: int64
marine_data.loc[:,'G1':'G213']
| G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | G11 | G12 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | NaN | NaN | 1.0 | NaN | 8.0 | 31.0 | 2.0 | 4.0 | NaN | 1.0 | ... | 2.0 | 4.0 | NaN | 3.0 | NaN | NaN | 3.0 | NaN | 1.0 | NaN |
| 1 | NaN | 12.0 | 1.0 | NaN | 10.0 | 9.0 | NaN | 2.0 | 3.0 | NaN | ... | 1.0 | NaN | 2.0 | NaN | NaN | NaN | 7.0 | 3.0 | NaN | NaN |
| 2 | NaN | 27.0 | 7.0 | NaN | 3.0 | 8.0 | 1.0 | 9.0 | NaN | 1.0 | ... | NaN | NaN | 5.0 | NaN | NaN | NaN | 2.0 | NaN | 2.0 | 1.0 |
| 3 | NaN | 15.0 | 60.0 | NaN | 18.0 | 46.0 | NaN | 9.0 | 10.0 | 5.0 | ... | 5.0 | NaN | 2.0 | NaN | NaN | NaN | 5.0 | 26.0 | NaN | NaN |
| 4 | NaN | 5.0 | 6.0 | NaN | 13.0 | 19.0 | 13.0 | 6.0 | 4.0 | 2.0 | ... | NaN | NaN | NaN | NaN | NaN | NaN | 3.0 | NaN | 2.0 | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | NaN | 1.0 | 23.0 | NaN | 16.0 | NaN | NaN | 1.0 | NaN | 1.0 | ... | 1.0 | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN |
| 3409 | NaN | 0.0 | 1.0 | NaN | 0.0 | 2.0 | NaN | 0.0 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3410 | NaN | 5.0 | NaN | NaN | 11.0 | 1.0 | NaN | 28.0 | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 8.0 | NaN | NaN | NaN |
| 3411 | NaN | NaN | 3.0 | NaN | 1.0 | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | 1.0 | NaN | NaN | NaN |
| 3412 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
3389 rows × 162 columns
marine_data.loc[:,'G1':'G213'] = marine_data.loc[:,'G1':'G213'].fillna(0)
marine_data.loc[:,'G1':'G213']
| G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | G11 | G12 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0.0 | 0.0 | 1.0 | 0.0 | 8.0 | 31.0 | 2.0 | 4.0 | 0.0 | 1.0 | ... | 2.0 | 4.0 | 0.0 | 3.0 | 0.0 | 0.0 | 3.0 | 0.0 | 1.0 | 0.0 |
| 1 | 0.0 | 12.0 | 1.0 | 0.0 | 10.0 | 9.0 | 0.0 | 2.0 | 3.0 | 0.0 | ... | 1.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 7.0 | 3.0 | 0.0 | 0.0 |
| 2 | 0.0 | 27.0 | 7.0 | 0.0 | 3.0 | 8.0 | 1.0 | 9.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 5.0 | 0.0 | 0.0 | 0.0 | 2.0 | 0.0 | 2.0 | 1.0 |
| 3 | 0.0 | 15.0 | 60.0 | 0.0 | 18.0 | 46.0 | 0.0 | 9.0 | 10.0 | 5.0 | ... | 5.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 5.0 | 26.0 | 0.0 | 0.0 |
| 4 | 0.0 | 5.0 | 6.0 | 0.0 | 13.0 | 19.0 | 13.0 | 6.0 | 4.0 | 2.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | 0.0 | 2.0 | 0.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 3408 | 0.0 | 1.0 | 23.0 | 0.0 | 16.0 | 0.0 | 0.0 | 1.0 | 0.0 | 1.0 | ... | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 |
| 3409 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 3410 | 0.0 | 5.0 | 0.0 | 0.0 | 11.0 | 1.0 | 0.0 | 28.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 8.0 | 0.0 | 0.0 | 0.0 |
| 3411 | 0.0 | 0.0 | 3.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
| 3412 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
3389 rows × 162 columns
# Checking the number of NaN values in each column of the marine_data dataframe after NaN fill
print(marine_data.isna().sum().to_string())
CommunityName 0 BeachName 0 BeachCountrycode 249 BeachRegionalSea 0 BeachLength_m 0 BeachLocation 1729 BeachType 1707 EventDate 0 EventType 0 NatRef 3275 lon_x1 0 lat_y1 0 lon_x2 0 lat_y2 0 G1 0 G3 0 G4 0 G5 0 G7 0 G8 0 G9 0 G10 0 G11 0 G12 0 G13 0 G14 0 G15 0 G16 0 G17 0 G18 0 G19 0 G21 0 G22 0 G23 0 G24 0 G25 0 G26 0 G27 0 G28 0 G29 0 G30 0 G31 0 G32 0 G33 0 G34 0 G35 0 G36 0 G37 0 G40 0 G41 0 G42 0 G43 0 G44 0 G45 0 G46 0 G47 0 G49 0 G50 0 G52 0 G53 0 G54 0 G56 0 G57 0 G58 0 G59 0 G60 0 G62 0 G63 0 G64 0 G65 0 G66 0 G67 0 G68 0 G69 0 G70 0 G71 0 G72 0 G73 0 G76 0 G77 0 G79 0 G80 0 G82 0 G83 0 G84 0 G85 0 G86 0 G87 0 G88 0 G89 0 G90 0 G91 0 G92 0 G93 0 G95 0 G96 0 G97 0 G98 0 G99 0 G100 0 G101 0 G102 0 G124 0 G125 0 G126 0 G127 0 G128 0 G129 0 G130 0 G131 0 G132 0 G133 0 G134 0 G137 0 G138 0 G139 0 G140 0 G141 0 G142 0 G143 0 G144 0 G145 0 G147 0 G148 0 G150 0 G151 0 G152 0 G153 0 G154 0 G155 0 G156 0 G158 0 G159 0 G160 0 G161 0 G162 0 G163 0 G164 0 G165 0 G166 0 G167 0 G171 0 G172 0 G174 0 G175 0 G176 0 G177 0 G178 0 G179 0 G180 0 G181 0 G182 0 G184 0 G186 0 G187 0 G188 0 G189 0 G190 0 G191 0 G193 0 G194 0 G195 0 G198 0 G199 0 G200 0 G201 0 G202 0 G203 0 G204 0 G205 0 G206 0 G207 0 G208 0 G210 0 G211 0 G213 0
marine_data['CommunityName'].nunique()
54
marine_data['EventType'].value_counts()
Cleanup 2599 Monitoring 790 Name: EventType, dtype: int64
marine_data.groupby(['EventType'])['BeachRegionalSea'].value_counts()
EventType BeachRegionalSea
Cleanup Mediterranean Sea 1502
North-east Atlantic Ocean 655
Black Sea 170
Unknown 138
Baltic Sea 134
Monitoring Mediterranean Sea 478
North-east Atlantic Ocean 155
Black Sea 98
Baltic Sea 46
Unknown 13
Name: BeachRegionalSea, dtype: int64
# Creating dataframe with only generalcode tally totals in marine_data dataframe
code_totals = marine_data.loc[:,'G1':'G213'].sum(axis=0,skipna=True).reset_index()
code_totals
| index | 0 | |
|---|---|---|
| 0 | G1 | 3793.0 |
| 1 | G3 | 84380.0 |
| 2 | G4 | 22745.0 |
| 3 | G5 | 4628.0 |
| 4 | G7 | 33861.0 |
| ... | ... | ... |
| 157 | G207 | 350.0 |
| 158 | G208 | 62785.0 |
| 159 | G210 | 8471.0 |
| 160 | G211 | 1306.0 |
| 161 | G213 | 14184.0 |
162 rows × 2 columns
# Changing the column names of the code_totals dataframe
code_totals.rename(columns={"index":"generalcode",0:"total"},
inplace=True)
code_totals
| generalcode | total | |
|---|---|---|
| 0 | G1 | 3793.0 |
| 1 | G3 | 84380.0 |
| 2 | G4 | 22745.0 |
| 3 | G5 | 4628.0 |
| 4 | G7 | 33861.0 |
| ... | ... | ... |
| 157 | G207 | 350.0 |
| 158 | G208 | 62785.0 |
| 159 | G210 | 8471.0 |
| 160 | G211 | 1306.0 |
| 161 | G213 | 14184.0 |
162 rows × 2 columns
# Creating a dataframe merging the marine_meta and code_totals dataframes together
sub_category_totals = pd.merge(marine_meta,code_totals)
sub_category_totals
| generalcode | category | generalname | total | |
|---|---|---|---|---|
| 0 | G1 | Plastic | 4/6-pack yokes, six-pack rings | 3793.0 |
| 1 | G3 | Plastic | Shopping Bags incl. pieces | 84380.0 |
| 2 | G4 | Plastic | Small plastic bags, e.g. freezer bags incl. pi... | 22745.0 |
| 3 | G5 | Plastic | Plastic bags collective role; what remains fro... | 4628.0 |
| 4 | G7 | Plastic | Drink bottles <=0.5l | 33861.0 |
| ... | ... | ... | ... | ... |
| 157 | G207 | Glass/ceramics | Octopus pots | 350.0 |
| 158 | G208 | Glass/ceramics | Glass or ceramic fragments > 2.5 cm | 62785.0 |
| 159 | G210 | Glass/ceramics | Other glass items | 8471.0 |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... | 1306.0 |
| 161 | G213 | Chemicals | Paraffin/Wax | 14184.0 |
162 rows × 4 columns
# Creating a plotly table using the Category Totals dataframe
row_color1='white'
row_color2='lightsteelblue'
sub_category_fig = go.Figure(data=[go.Table(
header=dict(values=[['General Code'],['Category'],['General Name'],['Totals']],
fill_color='lightgrey',
line_color='darkslategray',
align='left'),
cells=dict(values=[sub_category_totals.generalcode, sub_category_totals.category,
sub_category_totals.generalname, sub_category_totals.total],
fill_color=[[row_color1,row_color2]*81],# 81 is half the number of rows in sub_category_totals dataframe
line_color='darkslategray',
align='left'))
])
sub_category_fig.update_layout(
showlegend=False,
title_text="<b>Marine Litter Sub-Category Totals (Itemized)</b>"
)
sub_category_fig.show()
# Create dataframe with category totals
category_totals = sub_category_totals.groupby('category').agg({'total':'sum'}).reset_index()
# Create percentage column
category_totals['% of Litter Collected'] = (category_totals['total']) / (category_totals['total'].sum()) * 100
# Round % to two decimal places
category_totals['% of Litter Collected'] = category_totals['% of Litter Collected'].round(decimals=2)
category_totals
| category | total | % of Litter Collected | |
|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.69 |
| 1 | Cloth/textile | 29887.0 | 1.45 |
| 2 | Glass/ceramics | 111339.0 | 5.40 |
| 3 | Metal | 81393.0 | 3.95 |
| 4 | Paper/Cardboard | 42871.0 | 2.08 |
| 5 | Plastic | 1747827.0 | 84.84 |
| 6 | Processed/worked wood | 18899.0 | 0.92 |
| 7 | Rubber | 12422.0 | 0.60 |
| 8 | unidentified | 1306.0 | 0.06 |
category_fig = go.Figure(data=[go.Table(
columnorder = [1,2],
columnwidth = [80,70],
header = dict(
values = [['<b>Plastic</b> made up <b>84.84%</b> <br>of all<b> Litter Collected</b>'],
['<b>54</b> communities participated']],
line_color='white',
fill_color='white',
align='left',
font=dict(color='#0000FF',size=35),
height=40),
cells=dict(values=[['<b>2599</b> Cleanup events'],
['<b>790</b> Monitoring events']],
line_color='white',
fill_color='white',
align='left',
font=dict(color='#0000FF',size=35),
height=40,)
)
])
category_fig.update_layout(width=1085, height=450)
category_fig.show()
top_litter_items = sub_category_totals.sort_values(['category','total'],ascending=[True,False]).groupby('category').head(1)
top_litter_items
| generalcode | category | generalname | total | |
|---|---|---|---|---|
| 161 | G213 | Chemicals | Paraffin/Wax | 14184.0 |
| 107 | G145 | Cloth/textile | Other textiles (incl. rags) | 13834.0 |
| 158 | G208 | Glass/ceramics | Glass or ceramic fragments > 2.5 cm | 62785.0 |
| 133 | G178 | Metal | Bottle caps, lids & pull tabs | 26295.0 |
| 117 | G158 | Paper/Cardboard | Other paper items | 11943.0 |
| 23 | G27 | Plastic | Cigarette butts and filters | 502228.0 |
| 127 | G171 | Processed/worked wood | Other wood < 50 cm | 5931.0 |
| 89 | G125 | Rubber | Balloons and balloon sticks | 4490.0 |
| 160 | G211 | unidentified | Other medical items (swabs, bandaging, adhesiv... | 1306.0 |
category_totals
| category | total | % of Litter Collected | |
|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.69 |
| 1 | Cloth/textile | 29887.0 | 1.45 |
| 2 | Glass/ceramics | 111339.0 | 5.40 |
| 3 | Metal | 81393.0 | 3.95 |
| 4 | Paper/Cardboard | 42871.0 | 2.08 |
| 5 | Plastic | 1747827.0 | 84.84 |
| 6 | Processed/worked wood | 18899.0 | 0.92 |
| 7 | Rubber | 12422.0 | 0.60 |
| 8 | unidentified | 1306.0 | 0.06 |
# Merging category totals with top litter items by category
category_totals = pd.merge(category_totals,top_litter_items,on='category')
# Renaming the columns
category_totals.rename(columns={'total_x':'litter_totals','generalname':'top_litter_item','total_y':'top_litter_totals'},inplace=True)
# Creating the % of Litter Collected by category column
category_totals['% of Litter Collected (Category)'] = category_totals['top_litter_totals']/category_totals['litter_totals']*100
# Rounding the column to two decimal places
category_totals['% of Litter Collected (Category)'] = category_totals['% of Litter Collected (Category)'].round(decimals=2)
category_totals
| category | litter_totals | % of Litter Collected | generalcode | top_litter_item | top_litter_totals | % of Litter Collected (Category) | |
|---|---|---|---|---|---|---|---|
| 0 | Chemicals | 14184.0 | 0.69 | G213 | Paraffin/Wax | 14184.0 | 100.00 |
| 1 | Cloth/textile | 29887.0 | 1.45 | G145 | Other textiles (incl. rags) | 13834.0 | 46.29 |
| 2 | Glass/ceramics | 111339.0 | 5.40 | G208 | Glass or ceramic fragments > 2.5 cm | 62785.0 | 56.39 |
| 3 | Metal | 81393.0 | 3.95 | G178 | Bottle caps, lids & pull tabs | 26295.0 | 32.31 |
| 4 | Paper/Cardboard | 42871.0 | 2.08 | G158 | Other paper items | 11943.0 | 27.86 |
| 5 | Plastic | 1747827.0 | 84.84 | G27 | Cigarette butts and filters | 502228.0 | 28.73 |
| 6 | Processed/worked wood | 18899.0 | 0.92 | G171 | Other wood < 50 cm | 5931.0 | 31.38 |
| 7 | Rubber | 12422.0 | 0.60 | G125 | Balloons and balloon sticks | 4490.0 | 36.15 |
| 8 | unidentified | 1306.0 | 0.06 | G211 | Other medical items (swabs, bandaging, adhesiv... | 1306.0 | 100.00 |
# Creating a plotly table using the category_totals dataframe
category_totals_fig = go.Figure(data=[go.Table(
header=dict(values=[['Category'],['Litter Totals'],['% of Litter Collected'],['Top Litter Item'],
['Item Totals'],['% of Litter Collected (Category)']],
fill_color='lightgrey',
line_color='darkslategray',
align='left'),
cells=dict(values=[category_totals.category, category_totals.litter_totals, category_totals['% of Litter Collected'],
category_totals.top_litter_item, category_totals.top_litter_totals,
category_totals['% of Litter Collected (Category)']],
fill_color=[[row_color1,row_color2]*5],
line_color='darkslategray',
align='left'))
])
category_totals_fig.update_layout(
showlegend=False,height=650,
title_text="<b>Category Totals</b>")
category_totals_fig.show()
# Summing each litter sub-category (column-wise: all G1's, G2's, etc.) together grouping by oceanbody and year
litter_totals = marine_data.loc[:,'G1':'G213'].groupby([marine_data.BeachRegionalSea,
marine_data.EventDate.dt.year]).sum().reset_index()
litter_totals
| BeachRegionalSea | EventDate | G1 | G3 | G4 | G5 | G7 | G8 | G9 | G10 | ... | G202 | G203 | G204 | G205 | G206 | G207 | G208 | G210 | G211 | G213 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Baltic Sea | 2013 | 19.0 | 56.0 | 42.0 | 3.0 | 25.0 | 61.0 | 9.0 | 173.0 | ... | 0.0 | 0.0 | 6.0 | 0.0 | 0.0 | 0.0 | 5.0 | 0.0 | 4.0 | 0.0 |
| 1 | Baltic Sea | 2014 | 73.0 | 32.0 | 261.0 | 0.0 | 44.0 | 25.0 | 3.0 | 46.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 16.0 | 0.0 | 0.0 | 1.0 |
| 2 | Baltic Sea | 2015 | 0.0 | 1027.0 | 22.0 | 0.0 | 36.0 | 23.0 | 2.0 | 60.0 | ... | 0.0 | 0.0 | 44.0 | 0.0 | 0.0 | 0.0 | 43.0 | 0.0 | 1.0 | 1.0 |
| 3 | Baltic Sea | 2016 | 0.0 | 78.0 | 56.0 | 50.0 | 26.0 | 16.0 | 1.0 | 35.0 | ... | 1.0 | 1.0 | 187.0 | 0.0 | 0.0 | 0.0 | 43.0 | 14.0 | 5.0 | 9135.0 |
| 4 | Baltic Sea | 2017 | 1.0 | 73.0 | 281.0 | 197.0 | 64.0 | 37.0 | 0.0 | 82.0 | ... | 1.0 | 2.0 | 205.0 | 0.0 | 0.0 | 5.0 | 3784.0 | 15.0 | 46.0 | 31.0 |
| 5 | Baltic Sea | 2018 | 0.0 | 54.0 | 78.0 | 58.0 | 30.0 | 12.0 | 1.0 | 37.0 | ... | 1.0 | 0.0 | 188.0 | 0.0 | 0.0 | 0.0 | 625.0 | 2.0 | 1.0 | 127.0 |
| 6 | Baltic Sea | 2019 | 1.0 | 34.0 | 190.0 | 117.0 | 23.0 | 16.0 | 9.0 | 52.0 | ... | 0.0 | 0.0 | 42.0 | 0.0 | 0.0 | 0.0 | 44.0 | 1.0 | 22.0 | 4.0 |
| 7 | Baltic Sea | 2020 | 0.0 | 14.0 | 61.0 | 0.0 | 5.0 | 4.0 | 0.0 | 9.0 | ... | 0.0 | 0.0 | 21.0 | 0.0 | 0.0 | 0.0 | 13.0 | 0.0 | 0.0 | 10.0 |
| 8 | Black Sea | 2014 | 1.0 | 87.0 | 636.0 | 14.0 | 98.0 | 134.0 | 1.0 | 12.0 | ... | 1.0 | 0.0 | 79.0 | 0.0 | 0.0 | 0.0 | 78.0 | 73.0 | 2.0 | 0.0 |
| 9 | Black Sea | 2015 | 7.0 | 672.0 | 1055.0 | 20.0 | 1346.0 | 1472.0 | 54.0 | 434.0 | ... | 29.0 | 15.0 | 111.0 | 7.0 | 0.0 | 0.0 | 469.0 | 188.0 | 23.0 | 1.0 |
| 10 | Black Sea | 2016 | 29.0 | 165.0 | 1396.0 | 38.0 | 442.0 | 629.0 | 55.0 | 171.0 | ... | 16.0 | 11.0 | 773.0 | 5.0 | 0.0 | 0.0 | 52.0 | 7.0 | 29.0 | 2.0 |
| 11 | Black Sea | 2017 | 0.0 | 126.0 | 1504.0 | 181.0 | 142.0 | 115.0 | 4.0 | 45.0 | ... | 1.0 | 28.0 | 1014.0 | 0.0 | 0.0 | 0.0 | 61.0 | 7.0 | 8.0 | 1.0 |
| 12 | Black Sea | 2018 | 8.0 | 198.0 | 1532.0 | 103.0 | 432.0 | 496.0 | 76.0 | 214.0 | ... | 2.0 | 25.0 | 105.0 | 2.0 | 1.0 | 0.0 | 551.0 | 3.0 | 19.0 | 8.0 |
| 13 | Black Sea | 2019 | 6.0 | 817.0 | 1926.0 | 121.0 | 1205.0 | 715.0 | 206.0 | 397.0 | ... | 23.0 | 5.0 | 672.0 | 6.0 | 0.0 | 0.0 | 2128.0 | 39.0 | 72.0 | 6.0 |
| 14 | Black Sea | 2020 | 4.0 | 505.0 | 1151.0 | 130.0 | 352.0 | 528.0 | 94.0 | 204.0 | ... | 8.0 | 3.0 | 373.0 | 3.0 | 0.0 | 0.0 | 583.0 | 36.0 | 23.0 | 4.0 |
| 15 | Black Sea | 2021 | 0.0 | 2.0 | 6.0 | 4.0 | 21.0 | 11.0 | 0.0 | 5.0 | ... | 0.0 | 2.0 | 3.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | 0.0 |
| 16 | Mediterranean Sea | 2013 | 9.0 | 173.0 | 457.0 | 2.0 | 116.0 | 12.0 | 6.0 | 359.0 | ... | 0.0 | 0.0 | 23.0 | 0.0 | 0.0 | 0.0 | 263.0 | 33.0 | 5.0 | 0.0 |
| 17 | Mediterranean Sea | 2014 | 34.0 | 973.0 | 172.0 | 11.0 | 965.0 | 903.0 | 139.0 | 590.0 | ... | 5.0 | 8.0 | 494.0 | 0.0 | 0.0 | 0.0 | 1770.0 | 75.0 | 9.0 | 2.0 |
| 18 | Mediterranean Sea | 2015 | 35.0 | 6440.0 | 1381.0 | 29.0 | 4012.0 | 2793.0 | 295.0 | 2942.0 | ... | 10.0 | 1.0 | 65.0 | 0.0 | 0.0 | 0.0 | 3593.0 | 83.0 | 28.0 | 0.0 |
| 19 | Mediterranean Sea | 2016 | 139.0 | 1543.0 | 340.0 | 35.0 | 3204.0 | 3680.0 | 442.0 | 798.0 | ... | 218.0 | 2.0 | 292.0 | 0.0 | 0.0 | 0.0 | 1126.0 | 670.0 | 65.0 | 0.0 |
| 20 | Mediterranean Sea | 2017 | 1514.0 | 3812.0 | 3388.0 | 1609.0 | 4519.0 | 4077.0 | 1176.0 | 3334.0 | ... | 61.0 | 307.0 | 3190.0 | 20.0 | 178.0 | 176.0 | 13374.0 | 364.0 | 357.0 | 154.0 |
| 21 | Mediterranean Sea | 2018 | 1389.0 | 3059.0 | 2512.0 | 1730.0 | 3827.0 | 3038.0 | 1041.0 | 2389.0 | ... | 45.0 | 205.0 | 2117.0 | 16.0 | 169.0 | 168.0 | 6928.0 | 1026.0 | 287.0 | 16.0 |
| 22 | Mediterranean Sea | 2019 | 28.0 | 11836.0 | 53.0 | 3.0 | 2457.0 | 2343.0 | 32.0 | 3718.0 | ... | 5.0 | 1.0 | 36.0 | 0.0 | 1.0 | 0.0 | 6181.0 | 0.0 | 10.0 | 0.0 |
| 23 | Mediterranean Sea | 2020 | 38.0 | 184.0 | 58.0 | 24.0 | 93.0 | 129.0 | 0.0 | 25.0 | ... | 0.0 | 0.0 | 10.0 | 0.0 | 0.0 | 0.0 | 39.0 | 2.0 | 80.0 | 0.0 |
| 24 | Mediterranean Sea | 2021 | 0.0 | 60.0 | 0.0 | 0.0 | 21.0 | 7.0 | 0.0 | 6.0 | ... | 0.0 | 0.0 | 36.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 |
| 25 | North-east Atlantic Ocean | 2013 | 4.0 | 238.0 | 4.0 | 0.0 | 103.0 | 0.0 | 0.0 | 135.0 | ... | 1.0 | 0.0 | 22.0 | 0.0 | 0.0 | 0.0 | 23.0 | 0.0 | 2.0 | 24.0 |
| 26 | North-east Atlantic Ocean | 2014 | 357.0 | 867.0 | 566.0 | 3.0 | 697.0 | 308.0 | 7.0 | 1156.0 | ... | 0.0 | 7.0 | 22.0 | 0.0 | 0.0 | 0.0 | 520.0 | 49.0 | 33.0 | 3.0 |
| 27 | North-east Atlantic Ocean | 2015 | 31.0 | 8026.0 | 862.0 | 1.0 | 2599.0 | 1729.0 | 89.0 | 3203.0 | ... | 10.0 | 1.0 | 84.0 | 0.0 | 0.0 | 0.0 | 6173.0 | 198.0 | 17.0 | 91.0 |
| 28 | North-east Atlantic Ocean | 2016 | 23.0 | 1463.0 | 671.0 | 8.0 | 1137.0 | 494.0 | 33.0 | 653.0 | ... | 7.0 | 3.0 | 195.0 | 1.0 | 0.0 | 0.0 | 1304.0 | 198.0 | 49.0 | 1929.0 |
| 29 | North-east Atlantic Ocean | 2017 | 5.0 | 755.0 | 352.0 | 94.0 | 662.0 | 58.0 | 180.0 | 525.0 | ... | 4.0 | 2.0 | 278.0 | 0.0 | 0.0 | 0.0 | 358.0 | 130.0 | 22.0 | 2290.0 |
| 30 | North-east Atlantic Ocean | 2018 | 21.0 | 1079.0 | 581.0 | 1.0 | 723.0 | 48.0 | 81.0 | 430.0 | ... | 2.0 | 4.0 | 723.0 | 0.0 | 0.0 | 0.0 | 449.0 | 2019.0 | 44.0 | 66.0 |
| 31 | North-east Atlantic Ocean | 2019 | 7.0 | 32013.0 | 667.0 | 20.0 | 1421.0 | 1991.0 | 49.0 | 12690.0 | ... | 5.0 | 40.0 | 248.0 | 1.0 | 1.0 | 1.0 | 9607.0 | 2744.0 | 17.0 | 135.0 |
| 32 | North-east Atlantic Ocean | 2020 | 3.0 | 62.0 | 116.0 | 1.0 | 17.0 | 9.0 | 9.0 | 87.0 | ... | 2.0 | 21.0 | 145.0 | 1.0 | 0.0 | 0.0 | 94.0 | 6.0 | 1.0 | 141.0 |
| 33 | Unknown | 2014 | 0.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | 0.0 | 1.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| 34 | Unknown | 2015 | 0.0 | 2942.0 | 217.0 | 0.0 | 900.0 | 370.0 | 0.0 | 227.0 | ... | 0.0 | 0.0 | 2.0 | 0.0 | 0.0 | 0.0 | 476.0 | 0.0 | 0.0 | 0.0 |
| 35 | Unknown | 2016 | 0.0 | 433.0 | 0.0 | 0.0 | 51.0 | 0.0 | 12.0 | 48.0 | ... | 10.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 470.0 | 0.0 | 0.0 |
| 36 | Unknown | 2017 | 0.0 | 11.0 | 45.0 | 0.0 | 3.0 | 2.0 | 0.0 | 4.0 | ... | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 | 0.0 | 57.0 | 0.0 | 0.0 | 2.0 |
| 37 | Unknown | 2018 | 7.0 | 85.0 | 96.0 | 21.0 | 310.0 | 404.0 | 54.0 | 98.0 | ... | 0.0 | 3.0 | 90.0 | 0.0 | 0.0 | 0.0 | 314.0 | 19.0 | 20.0 | 0.0 |
| 38 | Unknown | 2019 | 0.0 | 4366.0 | 5.0 | 0.0 | 1312.0 | 2030.0 | 0.0 | 6442.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1621.0 | 0.0 | 0.0 | 0.0 |
| 39 | Unknown | 2020 | 0.0 | 19.0 | 5.0 | 0.0 | 420.0 | 3516.0 | 28.0 | 16.0 | ... | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 20.0 | 0.0 | 0.0 | 0.0 |
40 rows × 164 columns
# Concatenating litter totals with itself, attaching the BeachRegionalSea and EventDate columns with the litter
# sub-category columns (while summing the total (row-wise: G1 through G213) for each year and ocean body to return yearly total)
litter_totals = pd.concat([litter_totals.loc[:,'BeachRegionalSea':'EventDate'],
litter_totals.loc[:,'G1':'G213'].sum(axis=1)],axis=1)
# Changing the column names
litter_totals.rename(columns={'BeachRegionalSea':'Oceanbody','EventDate':'Year',0:'LitterTotals'},inplace=True)
# Checking litter_totals dataframe
litter_totals.head()
| Oceanbody | Year | LitterTotals | |
|---|---|---|---|
| 0 | Baltic Sea | 2013 | 2246.0 |
| 1 | Baltic Sea | 2014 | 2606.0 |
| 2 | Baltic Sea | 2015 | 2405.0 |
| 3 | Baltic Sea | 2016 | 10830.0 |
| 4 | Baltic Sea | 2017 | 28054.0 |
litter_graph = px.bar(litter_totals,x='Year',y='LitterTotals',color='Oceanbody',
title='Yearly Collection Totals',
labels={'LitterTotals':'Litter Collected (Sum)','Oceanbody':'Ocean/Sea'},
color_discrete_map={'Baltic Sea':'#007FFF',
'Black Sea':'#2A52BE',
'Mediterranean Sea':'#0000FF',
'North-east Atlantic Ocean':'#3F3F68',
'Unknown':'#191970'})
litter_graph.update_layout(xaxis_title=None,
updatemenus=[go.layout.Updatemenu(
active=0,
buttons=list(
[dict(label = 'All',
method = 'update',
args = [{'visible': [True, True, True, True, True]},
{'title': '<b>Litter Collection Totals</b>',
'showlegend':True}]),
dict(label = 'Baltic Sea',
method = 'update',
args = [{'visible': [True, False, False, False, False]},
{'title': '<b>Litter Collection Totals - Baltic Sea</b>',
'showlegend':False}]),
dict(label = 'Black Sea',
method = 'update',
args = [{'visible': [False, True, False, False, False]},
{'title': '<b>Litter Collection Totals - Black Sea</b>',
'showlegend':False}]),
dict(label = 'Mediterranean Sea',
method = 'update',
args = [{'visible': [False, False, True, False, False]},
{'title': '<b>Litter Collection Totals - Mediterranean Sea</b>',
'showlegend':False}]),
dict(label = 'North-east Atlantic Ocean',
method = 'update',
args = [{'visible': [False, False, False, True, False]},
{'title': '<b>Litter Collection Totals - North-east Atlantic Ocean</b>',
'showlegend':False}]),
dict(label = 'Unknown',
method = 'update',
args = [{'visible': [False, False, False, False, True]},
{'title': '<b>Litter Collection Totals - Unknown</b>',
'showlegend':False}]),
]),
type = "buttons",
direction="right",
pad={"r": 25, "t": 25},
x=0.5,
y=1.02,
showactive=True,
xanchor="center",
yanchor="bottom",
)
])
litter_graph.show()
# Showing the total number (NumofEvents) for each event (Cleanup/Monitoring) for each year/each BeachRegionalSea and
# the sum of litter counts for each type of litter (G1,G3,G4,etc.)
event_type_data = marine_data.groupby([marine_data.EventDate.dt.month,marine_data.EventDate.dt.year,
'EventType','BeachRegionalSea',
'lon_x2','lat_y2']).agg({'EventType':'count'}).reset_index(allow_duplicates=True)
event_type_data.columns.values[0] = 'Month'
event_type_data.columns.values[1] = 'Year'
event_type_data.columns.values[2] = 'EventType'
event_type_data.columns.values[3] = 'BeachRegionalSea'
event_type_data.columns.values[4] = 'lon_x1'
event_type_data.columns.values[5] = 'lat_y1'
event_type_data.columns.values[6] = 'NumofEvents(Count)'
event_type_data.head(15)
| Month | Year | EventType | BeachRegionalSea | lon_x1 | lat_y1 | NumofEvents(Count) | |
|---|---|---|---|---|---|---|---|
| 0 | 1 | 2014 | Cleanup | Mediterranean Sea | 9.886536 | 44.083107 | 1 |
| 1 | 1 | 2014 | Cleanup | North-east Atlantic Ocean | -1.320040 | 44.112300 | 1 |
| 2 | 1 | 2015 | Cleanup | Black Sea | 27.668371 | 42.409241 | 1 |
| 3 | 1 | 2015 | Cleanup | Black Sea | 27.709501 | 42.366164 | 2 |
| 4 | 1 | 2015 | Cleanup | Black Sea | 28.589187 | 43.809237 | 1 |
| 5 | 1 | 2015 | Cleanup | Black Sea | 28.589472 | 43.809385 | 1 |
| 6 | 1 | 2015 | Cleanup | Black Sea | 28.592534 | 43.829550 | 1 |
| 7 | 1 | 2015 | Cleanup | Black Sea | 28.592712 | 43.827780 | 1 |
| 8 | 1 | 2015 | Cleanup | Black Sea | 28.593001 | 43.827915 | 1 |
| 9 | 1 | 2015 | Cleanup | Black Sea | 28.629170 | 44.230505 | 1 |
| 10 | 1 | 2015 | Cleanup | Mediterranean Sea | 2.555432 | 39.699948 | 1 |
| 11 | 1 | 2015 | Cleanup | Mediterranean Sea | 10.265680 | 43.790288 | 1 |
| 12 | 1 | 2015 | Cleanup | Mediterranean Sea | 26.968493 | 37.698421 | 1 |
| 13 | 1 | 2016 | Cleanup | Mediterranean Sea | 10.026400 | 44.042900 | 1 |
| 14 | 1 | 2016 | Cleanup | Mediterranean Sea | 23.904980 | 37.736122 | 2 |
# Creating a map visualation displaying the locations of litter events
event_map = px.scatter_geo(event_type_data, lat='lat_y1',lon='lon_x1',color='EventType',
labels={'EventType':'Event Type'},color_discrete_sequence=['blue','red'],
size="NumofEvents(Count)",height=600,hover_name='EventType',hover_data=['Month','Year'],
projection="natural earth",title='Litter Event Locations')
event_map.update_layout(xaxis_title=None,
updatemenus=[go.layout.Updatemenu(
active=0,
buttons=list(
[dict(label = 'Both',
method = 'update',
args = [{'visible': [True, True]},
{'title': '<b>Litter Event Locations</b>',
'showlegend':True}]),
dict(label = 'Cleanup',
method = 'update',
args = [{'visible': [True, False]},
{'title': '<b>Litter Event Locations - Cleaning</b>',
'showlegend':True}]),
dict(label = 'Monitoring',
method = 'update',
args = [{'visible': [False, True]},
{'title': '<b>Litter Event Locations - Monitoring</b>',
'showlegend':True}])
]),
type = "buttons",
direction="right",
pad={"r": 25, "t": 25},
x=0.5,
y=1.02,
showactive=True,
xanchor="center",
yanchor="bottom"
)
])
event_map.update_geos(showrivers=True,showlakes=True,rivercolor='slateblue',lakecolor='slateblue',showcountries=True)
event_map.show()